# coding=utf-8
# 把目录列表转为xml结构

import re
from xml.dom import minidom

a = r'C:\Users\Administrator\Desktop\ff\sp3.txt'  # 输入
s = open(a).read()

l = re.findall(r'(?<=\n)\d+(?:\.\d+)*.+(?=\n)', s)

for i in l: s = s.replace(i, '')
s = re.sub(r'([\r\n])+\s*\1+', r'\1', s)
print s

ll = []

for i in l:
    a = re.match(r'(\d+(?:\.\d+)*)\s*(.+?)\s*(\d+)\s*$', i)
    ll += [(a.group(1), a.group(2), a.group(3))]

doc = minidom.Document()
root = doc.createElement('文档书签')

seq = [root, 1, 2, 3, 4]
for i in ll:
    j = i[0].count('.') + 1
    seq[j] = doc.createElement('书签')
    seq[j].setAttribute('文本', i[0] + ' ' + i[1])
    seq[j].setAttribute('页码', i[2])
    seq[j].setAttribute('动作', '转到页面')
    seq[j - 1].appendChild(seq[j])

doc.appendChild(root)

s = doc.toprettyxml()
print s

a = r'C:\Users\Administrator\Desktop\ff\sp3.xml'  # 输出

open(a, 'w').write(s.decode('utf-8').encode('gb2312'))
